load("data/simulations/nullmodel_grid_ll_19.Rda")
R <- Q %>% filter(p1==0.25,q1==0.5,devfrac==0.0) %>% select(p2,q2,lr) %>% mutate(x=p2,y=q2)
fun.1 <- function(x,off) x-off
library(RColorBrewer)
D0 <- R %>% mutate(llr=pmax(-100,-0.5*lr))
mtx <- acast(D0, x~y, value.var="llr")
x <- as.numeric(dimnames(mtx)[[1]])
y <- as.numeric(dimnames(mtx)[[2]])
dimnames(mtx) <- NULL
D1 <- bind_cols(x=x,y=fun.1(x,0.25),z=0) %>% filter(y <= 1.0, y >= 0)
mtx2 <- acast(D1, x~y, value.var="z")
dimnames(mtx2) <- NULL
fig <- plot_ly(x=x, y=y,z = mtx) %>% add_surface( showscale=FALSE, opacity = 0.8,
contours = list(
z = list(
show=TRUE,
usecolormap=TRUE,
highlightcolor="#ff0000",
project=list(z=TRUE)
)
)
)
fig <- fig %>% add_trace(x=D1$x, y=D1$y, z=1, type = "scatter3d",
mode = "lines",
line = list(color = "#D16103", width = 10))
fig <- fig %>% add_trace(x=D1$x, y=D1$y, z=-99, type = "scatter3d",
mode = "lines",
line = list(color = "gray", width = 20, dash = 'dot'))
zoom <- 1
fig <- fig %>% layout(
showlegend=FALSE,
scene = list(
camera=list(
eye = list(x=1.25/zoom, y=-1.25/zoom, z=1.5/zoom)
),
zaxis = list(tickfont=list(size=14),title=""),
yaxis = list(autotick = F, tickmode = "array", tickvals = c(0.25,0.5,0.75), tickfont=list(size=14),title=""),
xaxis = list(title="",tickfont=list(size=14))
)
)
fig
## Warning: `arrange_()` is deprecated as of dplyr 0.7.0.
## Please use `arrange()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
json<-plotly_json(fig,jsonedit = FALSE)
write(json,'figures/ll_landscape_top.json')
fig <- fig %>% layout(
showlegend=FALSE,
scene = list(
camera=list(
eye = list(x=4.5/3.1, y=5/3.1, z=2/3.1)
),
zaxis = list(tickfont=list(size=14),title=""),
yaxis = list(autotick = F, tickmode = "array", tickvals = c(0.25,0.5,0.75), tickfont=list(size=14),title=""),
xaxis = list(title="",tickfont=list(size=14))
)
)
fig
#orca graph test.json --width 800 --height 600 -f svg -o ll_landscape.svg
N=20000
size=100
p1=0.9
q1=0.15
X = bind_cols(C=rbinom(N, size=size, prob=p1), Counts="5modC")
Y = bind_cols(C=rbinom(N, size=size, prob=q1), Counts="5mC")
D = bind_cols(C=X$C-Y$C, Counts="5hmC")
R <- bind_rows(X,Y,D)
A <- data.frame(x=0:size,y=dbinom(0:size,size,p1)*N,Counts="5modC")
B <- data.frame(x=0:size,y=dbinom(0:size,size,q1)*N,Counts="5mC")
C <- data.frame(x=-size:size, y=BDN_vec(-size:size,size,size,p1,q1)*N,Counts="5hmC")
curve <- data.frame(x1 = 57, x2 = 70, y1 = 1700, y2 = 1300, Counts="5hmC")
p0 <- R %>% ggplot(.,aes(x = C,fill=Counts,color=Counts)) +
labs(x="number of successes",y="counts (bars) / scaled prob. (dashed lines)") +
geom_vline(xintercept=p1*size,color="black",linetype="dotted") +
geom_vline(xintercept=(p1-q1)*size,color="black",linetype="dotted") +
geom_vline(xintercept=q1*size,color="black",linetype="dotted") +
geom_histogram(binwidth=1,position="identity",boundary=0.5,color="white",alpha=0.3) +
geom_line(data=A,aes(x=x,y=y,color=Counts),size=1.1,linetype=2,alpha=0.9) +
geom_line(data=B,aes(x=x,y=y,color=Counts),size=1.1,linetype=2,alpha=0.9) +
geom_line(data=C[101:200,],aes(x=x,y=y,color=Counts),size=1.1,linetype="dashed",alpha=0.8) +
annotate("text",label="Binomial difference", x=58, y=1800, family = "CM Sans", size=4) +
geom_curve(aes(x = x1, y = y1, xend = x2, yend = y2), size=1.2, lineend="round",
data = curve, arrow = arrow(length = unit(0.03, "npc"))) +
theme_bw() + theme(legend.position = c(0.5, 0.25),
strip.background = element_rect(colour="white", fill="#FFFFFF"),
text = element_text(family="CM Sans", size=12),
plot.title = element_text(hjust = 0.2, vjust=-1, size=10),
panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
scale_fill_manual(values=custom.col) + scale_color_manual(values=custom.col)
ggsave(filename = "figures/bdn_example.pdf", plot = p0, device = "pdf")
## Saving 4 x 3 in image
ggsave(filename = "figures/bdn_example.pdf", plot = p0, device = "svg")
## Saving 4 x 3 in image
ggsave(filename = "figures/bdn_example.png", plot = p0, device = "png", family="CM Sans")
## Saving 4 x 3 in image
par(family = "CM Sans")
p0
#grob1 <- rasterGrob(readPNG("schematic.png"))
grob2 <- rasterGrob(readPNG("figures/ll_landscape_top.png"))
#fig1 <- ggarrange(grob1, NULL, p0, grob2, nrow = 1, widths=c(4,0.2,3,3),labels=c("a.","", "b.","c."),label.x=c(0.02,0.0,-0.05,0.0), font.label = list(family="CM Sans"))
fig1 <- ggarrange( p0, grob2, nrow = 1, widths=c(4,3),labels=c("a.", "b."),label.x=c(0.02,0.02), font.label = list(family="CM Sans"))
fig1
ggsave(filename = "figures/fig1.png", plot = fig1, device = "png")
## Saving 11 x 4.5 in image
ggsave(filename = "figures/fig1.noembed.pdf", plot = fig1, device = "pdf")
## Saving 11 x 4.5 in image
ggsave(filename = "figures/fig1.svg", plot = fig1, device = "svg")
## Saving 11 x 4.5 in image
embed_fonts("figures/fig1.noembed.pdf", outfile="figures/fig1.pdf")
For both tests, data is simulated with uniformly randomly drawn modification and methylation levels. For the test on absence of hydroxymethylation, the delta is set to zero. For the test on differential hydroxymethylation delta is random within the bounds of the modification levels.
load("data/simulations/nullmodel_random_deltanull_18.Rda")
R <- Q %>% mutate(coverage=factor(coverage), replicates=factor(replicates)) %>% filter(coverage %in% c(30,40,100), replicates==5) %>% dplyr::group_by(coverage) %>% slice_tail(n=10000) %>% select(replicates,coverage,p_C2,lr)
n <- nrow(R)
cov.labs <- c("cov. 30", "cov. 40", "cov. 100")
names(cov.labs) <- c("30", "40", "100")
p <- R %>% group_by(replicates) %>%
do(distplots= ggplot(.,aes(x = p_C2, color=coverage, fill=coverage)) +
geom_histogram(binwidth = 0.1, boundary=0, alpha=0.6) +
geom_hline(yintercept=n*0.1*0.33, linetype="dashed",color="black") +
labs(x = "p-value") +
coord_cartesian(ylim = c(0, n/3*0.2)) +
scale_color_manual(values=custom.col) +
scale_fill_manual(values=custom.col) + facet_grid(coverage ~ ., labeller = labeller(coverage = cov.labs)) +
scale_y_continuous(breaks=c(0, 500, 1000, 1500, 2000),labels=c("0","500","1K","1.5K","2K"))+
theme(plot.caption = element_text(hjust = 0, face= "italic"),
plot.title.position = "plot",
strip.background = element_rect(colour="white", fill="#FFFFFF"),
text = element_text(family = "CM Sans", size=11),
plot.title = element_text(hjust = 0.47, size=11),
legend.position = "none", panel.grid.major = element_blank(), panel.grid.minor = element_blank()
)
)
p1 <- p$distplots[[1]]
p1
load("data/simulations/nullmodel_random_deltanull_18.Rda")
R <- Q %>% mutate(coverage=factor(coverage), replicates=factor(replicates)) %>% filter(coverage %in% c(30,40,100), replicates==5) %>% dplyr::group_by(coverage) %>% slice_tail(n=10000)
custom.col <- c( "#C4961A", "#D16103", "#4E84C4", "#FFDB6D", "#F4EDCA",
"#C3D7A4", "#52854C", "#4E84C4", "#293352")
p2 <- ggplot(R, aes(x = z_C1, color=coverage)) + geom_density(aes(fill=coverage), size=0.3, alpha=0.2) +
coord_cartesian(xlim =c(-3, 3), ylim=c(0.0,0.5)) +
geom_function( fun = dnorm, args = list(sd = 1), color="#000000",linetype="dashed", size=0.5) +
labs(x = "z score") +
scale_colour_manual(values=custom.col) +
scale_fill_manual(values=custom.col) +
theme(plot.caption = element_text(hjust = 0, face= "italic"),
plot.title.position = "plot",
strip.background = element_rect(colour="white", fill="#FFFFFF"),
text = element_text(family = "CM Sans", size=11),
plot.title = element_text(hjust = 0.2, vjust=-1, size=11),
legend.position = "none", panel.grid.major = element_blank(), panel.grid.minor = element_blank())
p2
load("data/simulations/nullmodel_random_deltanull_18.Rda")
R <- Q %>% mutate(coverage=factor(coverage), replicates=factor(replicates)) %>% filter(coverage %in% c(30,40,100), replicates==5) %>% dplyr::group_by(coverage) %>% slice_tail(n=10000)
custom.col <- c( "#C4961A", "#D16103", "#4E84C4", "#FFDB6D", "#F4EDCA",
"#C3D7A4", "#52854C", "#4E84C4", "#293352")
q <- R %>% dplyr::group_by(replicates) %>%
do(qqplots=ggplot(.,aes(sample = z_C1, colour=coverage)) +
geom_qq_band(bandType="pointwise", distribution = "norm", dparams = list(sd=1), fill="#f2f2f2", alpha=0.1) +
geom_qq(distribution = stats::qnorm, dparams = list(sd=1), alpha=0.5) +
geom_abline(color="#000000",linetype="dashed",size=1) +
labs(y = "sample quantiles (z)", x="Theoretical quantiles (norm, sd=1)", colour="mean cov.") +
coord_cartesian(xlim =c(-3, 3), ylim = c(-4, 4)) +
annotate("text", label=paste0("(",.$replicates[1]," replicates, n=10000)"), x=7,y=1,family = "CM Sans", color="black",size=4,alpha=0.6) +
scale_colour_manual(values=custom.col) +
annotation_custom(grob=ggplotGrob(p1),
ymin = -1, ymax=4, xmin=-3.1, xmax=-0.5) +
annotation_custom(grob=ggplotGrob(p2),
ymin = -4, ymax=-1.5, xmin=-1.0, xmax=1.3) +
theme(plot.caption = element_text(hjust = 0, face= "italic"),
plot.title.position = "plot",
strip.background = element_rect(colour="white", fill="#FFFFFF"),
text = element_text(family = "CM Sans", size=12),
plot.title = element_text(hjust = 0.2, vjust=-1, size=10),
legend.position = c(0.85, 0.2), panel.grid.major = element_blank(), panel.grid.minor = element_blank())
)
p3 <- q$qqplots[[1]]
p3
load("data/simulations/nullmodel_random_commondelta_19.Rda")
R <- Q %>% mutate(coverage=factor(coverage), replicates=factor(replicates)) %>% filter(coverage %in% c(30,40,100), replicates==5) %>% dplyr::group_by(coverage) %>% slice_tail(n=10000)
cov.labs <- c("cov. 30", "cov. 40", "cov. 100")
names(cov.labs) <- c("30", "40", "100")
p <- R %>% group_by(replicates) %>%
do(distplots= ggplot(.,aes(x = p, color=coverage, fill=coverage)) +
geom_histogram(binwidth = 0.1, boundary=0, alpha=0.6) +
geom_hline(yintercept=10000*0.1, linetype="dashed",color="black") +
labs(x = "p-value") +
coord_cartesian(ylim = c(0, 2000)) +
scale_color_manual(values=custom.col) +
scale_fill_manual(values=custom.col) + facet_grid(coverage ~ ., labeller = labeller(coverage = cov.labs)) +
scale_y_continuous(breaks=c(0, 500, 1000, 1500, 2000),labels=c("0","500","1K","1.5K","2K"))+
theme(plot.caption = element_text(hjust = 0, face= "italic"),
plot.title.position = "plot",
strip.background = element_rect(colour="white", fill="#FFFFFF"),
text = element_text(family = "CM Sans", size=11),
plot.title = element_text(hjust = 0.47, size=12),
legend.position = "none", panel.grid.major = element_blank(), panel.grid.minor = element_blank()
)
)
p4 <- p$distplots[[1]]
p4
load("data/simulations/nullmodel_random_commondelta_19.Rda")
R <- Q %>% mutate(coverage=factor(coverage), replicates=factor(replicates)) %>% filter(coverage %in% c(30,40,100), replicates=="5") %>% slice_tail(n=10000) %>% drop_na()
custom.col <- c( "#C4961A", "#D16103", "#4E84C4", "#FFDB6D", "#F4EDCA",
"#C3D7A4", "#52854C", "#4E84C4", "#293352")
p5 <- ggplot(R, aes(x = lr, color=coverage)) + geom_density(aes(fill=coverage), size=0.3, alpha=0.2) +
coord_cartesian(xlim =c(0, 7.5), ylim=c(0.0,0.85)) +
labs(x = "lambda") +
stat_function( fun = dchisq, args = list(df = 1), color="#000000",linetype="dashed", size=0.5) +
scale_colour_manual(values=custom.col) +
scale_fill_manual(values=custom.col) +
theme(plot.caption = element_text(hjust = 0, face= "italic"),
plot.title.position = "plot",
strip.background = element_rect(colour="white", fill="#FFFFFF"),
text = element_text(family = "CM Sans", size=11),
plot.title = element_text(hjust = 0.2, vjust=-1, size=11),
legend.position = "none", panel.grid.major = element_blank(), panel.grid.minor = element_blank())
p5
load("data/simulations/nullmodel_random_commondelta_19.Rda")
R <- Q %>% mutate(coverage=factor(coverage), replicates=factor(replicates)) %>% filter(coverage %in% c(30,40,100), replicates=="5") %>% slice_tail(n=10000)
custom.col <- c( "#C4961A", "#D16103", "#4E84C4", "#FFDB6D", "#F4EDCA",
"#C3D7A4", "#52854C", "#4E84C4", "#293352")
q <- R %>% dplyr::group_by(replicates) %>%
do(qqplots=ggplot(.,aes(sample = lr, colour=coverage)) +
geom_qq_band(bandType="pointwise", distribution = "chisq", dparams = list(df=1), fill="#f2f2f2", alpha=0.1) +
geom_qq(distribution = stats::qchisq, dparams = list(df=1), alpha=0.5) +
geom_abline(color="#000000",linetype="dashed",size=1) +
labs(y = "sample quantiles (lambda)", x="Theoretical quantiles (chisq, df=1)", colour="mean cov.") +
coord_cartesian(xlim =c(0, 12), ylim = c(0, 17)) +
annotate("text", label=paste0("(",.$replicates[1]," replicates, n=10000)"), x=6,y=1,family = "CM Sans", color="black",size=4,alpha=0.6) +
scale_colour_manual(values=custom.col) +
annotation_custom(grob=ggplotGrob(p4),
ymin = 5, ymax=17, xmin=-0.5, xmax=5) +
annotation_custom(grob=ggplotGrob(p5),
ymin = 12, ymax=17, xmin=5, xmax=9.5) +
theme(plot.caption = element_text(hjust = 0, face= "italic"),
plot.title.position = "plot",
strip.background = element_rect(colour="white", fill="#FFFFFF"),
text = element_text(family = "CM Sans", size=12),
plot.title = element_text(hjust = 0.2, vjust=-1, size=10),
legend.position = c(0.85, 0.2), panel.grid.major = element_blank(), panel.grid.minor = element_blank())
)
p6 <- q$qqplots[[1]]
p6
fig2 <- ggarrange(p3, p6, nrow = 1, widths=c(6,6),labels=c("a.","b."),label.x=c(0.02,0.02),vjust=1)
fig2
ggsave(filename = "figures/fig2.png", plot = fig2, device = "png")
## Saving 11 x 4.5 in image
ggsave(filename = "figures/fig2.svg", plot = fig2, device = "svg")
## Saving 11 x 4.5 in image
ggsave(filename = "figures/fig2.noembed.pdf", plot= fig2, device="pdf")
## Saving 11 x 4.5 in image
embed_fonts("figures/fig2.noembed.pdf", outfile="figures/fig2.pdf")
load("data/simulations/nullmodel_grid_19.Rda")
R <- Q %>% mutate(coverage=factor(coverage))
p7 <- R %>% dplyr::group_by(replicates, combination) %>% filter((p1==q1 & p1==p2 & q1==q2)) %>% drop_na() %>%
do(qqplots=ggplot(.,aes(sample = z_C1, colour=coverage)) +
geom_qq_band(bandType="pointwise", distribution = "norm", dparams = list(sd=1), fill="#f2f2f2", alpha=0.1) +
stat_qq_line(distribution = "norm", dparams = list(sd=1), colour = "gray") +
geom_qq(distribution = stats::qnorm, dparams = list(sd=1), alpha=0.5) +
geom_abline(color="black",linetype="dashed",size=1) +
labs(y = "Sample (z score)", x="Theoretical (norm, sd=1)", colour="mean coverage") +
coord_cartesian(xlim =c(-3, 3), ylim = c(-4, 4)) +
annotate("text", label=paste0("p1:",.$p1[1]," q1:",.$q1[1],"(reps:",.$replicates[1],")"),
x=0,y=4,family = "CM Sans", color="black",size=4,alpha=0.6) +
scale_colour_manual(values=custom.col) +
theme(plot.caption = element_text(hjust = 0, face= "italic"),
plot.title.position = "plot",
strip.background = element_rect(colour="white", fill="#FFFFFF"),
text = element_text(family = "CM Sans", size=8),
plot.title = element_text(hjust = 0.2, vjust=-1, size=10),
legend.position = c(0.85, 0.2)))
mylabels <- tolower(as.character(as.roman(1:length(p7$qqplots))))
sfig1a <-ggarrange(plotlist=p7$qqplots[c(1:6)],ncol=2,nrow=3,labels=mylabels[c(1:6)],common.legend = TRUE)
sfig1b <-ggarrange(plotlist=p7$qqplots[c(7:9)],ncol=2,nrow=3,labels=mylabels[c(7:12)],common.legend = TRUE)
ggexport(sfig1a, sfig1b, filename = "figures/sfig1.noembed.pdf", width=8.3*0.9, height=11.7*0.9)
## file saved to figures/sfig1.noembed.pdf
embed_fonts("figures/sfig1.noembed.pdf", outfile="figures/sfig1.pdf")
sfig1a
sfig1b
load("data/simulations/nullmodel_grid_19.Rda")
R <- Q %>% mutate(coverage=factor(coverage))
p8 <- R %>% dplyr::group_by(replicates, combination) %>% drop_na() %>%
do(qqplots=ggplot(.,aes(sample = lr, colour=coverage)) +
geom_qq_band(bandType="pointwise", distribution = "chisq", dparams = list(df=1), fill="#f2f2f2", alpha=0.1) +
stat_qq_line(distribution = "chisq", dparams = list(df=1), colour = "gray") +
geom_qq(distribution = stats::qchisq, dparams = list(df=1), alpha=0.5) +
geom_abline(color="black",linetype="dashed",size=1) +
labs(y = "Sample (lambda)", x="Theoretical (chisq, df=1)", colour= "mean coverage") +
coord_cartesian(xlim =c(0, 12), ylim = c(0, 17)) +
annotate("text", label=paste0("p1:",.$p1[1]," q1:",.$q1[1],"; p2:",.$p2[1]," q2:",.$q2[1]," (reps:",.$replicates[1],")"),
x=6,y=17,family = "CM Sans", color="black",size=4,alpha=0.6) +
scale_colour_manual(values=custom.col) +
theme(plot.caption = element_text(hjust = 0, face= "italic"),
plot.title.position = "plot",
strip.background = element_rect(colour="white", fill="#FFFFFF"),
text = element_text(family = "CM Sans", size=8),
plot.title = element_text(hjust = 0.2, vjust=-1, size=10),
legend.position = c(0.85, 0.2)))
mylabels <- tolower(as.character(as.roman(1:length(p8$qqplots))))
sfig2a <-ggarrange(plotlist=p8$qqplots[c(1:6)],ncol=2,nrow=3,labels=mylabels[c(1:6)],common.legend = TRUE)
sfig2b <-ggarrange(plotlist=p8$qqplots[c(7:12)],ncol=2,nrow=3,labels=mylabels[c(7:12)],common.legend = TRUE)
sfig2c <-ggarrange(plotlist=p8$qqplots[c(13:18)],ncol=2,nrow=3,labels=mylabels[c(13:18)],common.legend = TRUE)
sfig2d <-ggarrange(plotlist=p8$qqplots[c(19:24)],ncol=2,nrow=3,labels=mylabels[c(19:24)],common.legend = TRUE)
sfig2e <-ggarrange(plotlist=p8$qqplots[c(25:30)],ncol=2,nrow=3,labels=mylabels[c(25:30)],common.legend = TRUE)
sfig2f <-ggarrange(plotlist=p8$qqplots[c(31:36)],ncol=2,nrow=3,labels=mylabels[c(31:36)],common.legend = TRUE)
ggexport(sfig2a, sfig2b, sfig2c,sfig2d, sfig2e, sfig2f, filename = "figures/sfig2.noembed.pdf", width=8.3*0.9, height=11.7*0.9)
## file saved to figures/sfig2.noembed.pdf
embed_fonts("figures/sfig2.noembed.pdf", outfile="figures/sfig2.pdf")
sfig2a
load("data/simulations/nullmodel_grid_19.Rda")
reps <- c("5","10","20")
p1s <- c(0.10,0.50,0.90)
q1s <- c(0.10,0.50,0.90)
p2s <- c(0.10,0.50,0.90)
q2s <- c(0.10,0.50,0.90)
#alternative names for 20 combinations
comb.labs <- paste0("p1:",p1s," q1:",q1s)
names(comb.labs) <- paste(1:length(p1s))
#alternative names for coverages
cov.labs <- c("mean coverage 30", "mean coverage 40", "mean coverage 100", "mean coverage 200")
names(cov.labs) <- c("30", "40", "100","200")
p9 <- list()
count <- 0
#iterate over replicates
for(i in reps) {
#divide comtinations in four groups of 5
for(g in c(1)) {
count <- count + 1
comb.loc.labs <- paste0(comb.labs,paste0(" rep:",i))
names(comb.loc.labs) <- paste(1:length(p1s))
if(g==1) {
group = c("1","2","3")
}
R <- Q %>% mutate(hasll=ifelse(is.na(lr),F,T)) %>%
filter(replicates==i,coverage %in% c("30","40","100","200"),combination %in% group)
p9[[count]] <- R %>% ggplot(.,aes(x = p_C1)) + #patternbar(.,aes(x = pval)) +
geom_histogram(binwidth = 0.1, boundary=0, color = "black") +
geom_hline(yintercept=5000*0.1, linetype="dashed",color="#D16103") +
labs(x = "p-value") +
coord_cartesian(ylim = c(0, 1500)) +
scale_fill_manual(values=c("#888888","#777777")) +
theme(text = element_text(family = "CM Sans", size=10), legend.position = "none") +
facet_grid(combination ~ coverage,
labeller = labeller(combination = comb.loc.labs, coverage=cov.labs))
}
}
p9
## [[1]]
##
## [[2]]
##
## [[3]]
ggexport(p9, filename = "figures/sfig3.noembed.pdf", width=8.3*0.9, height=11.7*0.9)
## file saved to figures/sfig3.noembed.pdf
embed_fonts("figures/sfig3.noembed.pdf", outfile="figures/sfig3.pdf")
load("data/simulations/nullmodel_grid_19.Rda")
reps <- c("5","10","20")
p1s <- c(0.10,0.50,0.90,0.10,0.50, 0.50,0.50,0.90,0.90,0.90, 0.10,0.10,0.50,0.50,0.90, 0.90,0.50,0.90,1.00,0.80)
q1s <- c(0.10,0.50,0.90,0.00,0.45, 0.10,0.00,0.50,0.10,0.00, 0.10,0.10,0.50,0.50,0.50, 0.50,0.40,0.80,0.75,0.40)
p2s <- c(0.10,0.50,0.90,0.10,0.50, 0.50,0.50,0.90,0.90,0.90, 0.50,0.90,0.90,0.00,0.50, 0.40,0.20,0.40,0.25,0.60)
q2s <- c(0.10,0.50,0.90,0.00,0.45, 0.10,0.00,0.50,0.10,0.00, 0.50,0.90,0.90,0.00,0.10, 0.00,0.10,0.30,0.00,0.20)
#alternative names for 20 combinations
comb.labs <- paste0("p1:",p1s," q1:",q1s, " p1:", p2s, " q2:", q2s)
names(comb.labs) <- paste(1:length(p1s))
#alternative names for coverages
cov.labs <- c("mean coverage 30", "mean coverage 40", "mean coverage 100", "mean coverage 200")
names(cov.labs) <- c("30", "40", "100","200")
p10 <- list()
count <- 0
#iterate over replicates
for(i in reps) {
#divide comtinations in four groups of 5
for(g in c(1,2,3,4)) {
count <- count + 1
comb.loc.labs <- paste0(comb.labs,paste0(" rep:",i))
names(comb.loc.labs) <- paste(1:length(p1s))
if(g==1) {
group = c("1","2","3","4","5")
} else if(g==2) {
group = c("6", "7","8","9","10")
} else if(g==3) {
group = c("11","12","13","14","15")
} else {
group = c("16","17","18","19","20")
}
R <- Q %>% mutate(hasll=ifelse(is.na(lr),F,T)) %>% filter(replicates==i,coverage %in% c("30","40","100","200"),combination %in% group)
p10[[count]] <- R %>% ggplot(.,aes(x = p)) + #patternbar(.,aes(x = pval)) +
geom_histogram(binwidth = 0.1, boundary=0, color = "black") +
geom_hline(yintercept=5000*0.1, linetype="dashed",color="#D16103") +
labs(x = "p-value") +
coord_cartesian(ylim = c(0, 1500)) +
scale_fill_manual(values=c("#888888","#777777")) +
theme(text = element_text(family = "CM Sans", size=10), legend.position = "none") +
facet_grid(combination ~ coverage, labeller = labeller(combination = comb.loc.labs, coverage=cov.labs))
}
}
p10
## [[1]]
##
## [[2]]
##
## [[3]]
##
## [[4]]
##
## [[5]]
##
## [[6]]
##
## [[7]]
##
## [[8]]
##
## [[9]]
##
## [[10]]
##
## [[11]]
##
## [[12]]
ggexport(p10, filename = "figures/sfig4.noembed.pdf", width=8.3*0.9, height=11.7*0.9)
## file saved to figures/sfig4.noembed.pdf
embed_fonts("figures/sfig4.noembed.pdf", outfile="figures/sfig4.pdf")
load("data/simulations/nullmodel_random_19.Rda")
precision = 2
R <- Q %>% select(delta1,d12_pll,d1_cil,d1_cir,replicates,coverage)
R <- R %>% mutate(center=signif(d12_pll-delta1,digits=precision), lo=signif(d1_cil-delta1,digits=precision), hi=signif(d1_cir-delta1,digits=precision))
R <- R %>% mutate(cover=ifelse((hi < 0 | lo > 0), F, T))
R <- R %>% dplyr::group_by(coverage,replicates) %>% dplyr::summarize(n=n(),hits=sum(cover==TRUE), misses=sum(cover==FALSE)) %>% mutate(cp=1-(misses/n)) %>% ungroup()
## `summarise()` regrouping output by 'coverage' (override with `.groups` argument)
stab1 <- R %>% mutate(coverage=as.factor(coverage)) %>% mutate(replicates=as.factor(replicates))
print(xtable(stab1, type = "latex"), file = "tables/stab1_ci_delta_table.tex")
stab1
## # A tibble: 15 x 6
## coverage replicates n hits misses cp
## <fct> <fct> <int> <int> <int> <dbl>
## 1 30 5 50000 47516 2484 0.950
## 2 30 10 50000 47409 2591 0.948
## 3 30 20 50000 47415 2585 0.948
## 4 40 5 50000 47473 2527 0.949
## 5 40 10 50000 47497 2503 0.950
## 6 40 20 50000 47504 2496 0.950
## 7 50 5 50000 47454 2546 0.949
## 8 50 10 50000 47503 2497 0.950
## 9 50 20 50000 47498 2502 0.950
## 10 100 5 50000 47447 2553 0.949
## 11 100 10 50000 47419 2581 0.948
## 12 100 20 50000 47406 2594 0.948
## 13 200 5 50000 47487 2513 0.950
## 14 200 10 50000 47477 2523 0.950
## 15 200 20 50000 47415 2585 0.948
load("data/simulations/nullmodel_random_19.Rda")
precision = 2
R <- Q %>% select(coverage,replicates,delta1,delta2,d0_pll,d0_lo,d0_hi) %>% mutate(dd = signif(delta1-delta2, digits=precision))
R <- R %>% mutate(d0_lo=signif(d0_lo,digits=precision), d0_hi = signif(d0_hi, digits=precision))
R <- R %>% mutate(cover=ifelse( dd < d0_lo | dd > d0_hi, F, T))
R <- R %>% dplyr::group_by(coverage,replicates) %>% dplyr::summarize(n=n(),hits=sum(cover==TRUE), misses=sum(cover==FALSE)) %>% mutate(cp=1-(misses/n)) %>% ungroup()
## `summarise()` regrouping output by 'coverage' (override with `.groups` argument)
stab2 <- R %>% mutate(coverage=as.factor(coverage)) %>% mutate(replicates=as.factor(replicates))
print(xtable(stab2, type = "latex"), file = "tables/stab2_ci_deltadelta_table.tex")
stab2
## # A tibble: 15 x 6
## coverage replicates n hits misses cp
## <fct> <fct> <int> <int> <int> <dbl>
## 1 30 5 50000 46242 3758 0.925
## 2 30 10 50000 46456 3544 0.929
## 3 30 20 50000 46568 3432 0.931
## 4 40 5 50000 46297 3703 0.926
## 5 40 10 50000 46466 3534 0.929
## 6 40 20 50000 46700 3300 0.934
## 7 50 5 50000 46295 3705 0.926
## 8 50 10 50000 46451 3549 0.929
## 9 50 20 50000 46769 3231 0.935
## 10 100 5 50000 46593 3407 0.932
## 11 100 10 50000 46880 3120 0.938
## 12 100 20 50000 47035 2965 0.941
## 13 200 5 50000 46874 3126 0.937
## 14 200 10 50000 47171 2829 0.943
## 15 200 20 50000 47480 2520 0.950
load("data/simulations/nullmodel_random_19.Rda")
custom.col <- c( "#D16103", "#4E84C4")
R <- Q %>% select(delta1,delta2,d0_pll,d0_lo,d0_hi) %>% slice_head(n=100) %>% mutate(dd = delta1-delta2)
R <- R %>% mutate(center=signif(d0_pll-dd,digits=4), lo=signif(d0_lo-dd,digits=4), hi=signif(d0_hi-dd,digits=4))
R <- R %>% mutate(cover=ifelse((hi < 0 | lo > 0), F, T))
p11 <- ggplot(R, aes(x=1:100, y = center,color=cover)) +
geom_point(size = 1) +
geom_errorbar(aes(ymax = hi, ymin = lo)) + scale_color_manual(values=custom.col) +
coord_cartesian(ylim = c(-0.3, 0.3)) +
labs(x = "sample") +
#labs(y=expression("simulated"~Delta*Delta~" (zero centered)"),colour= "overlap") +
labs(y="simulated diff. (zero centered)",colour= "overlap") +
geom_hline(yintercept=0,linetype="dashed",color="black",size=0.8) +
annotate("text", label=paste0("mean read cov. 30 (5 rep.)"),
x=25,y=0.24,family = "CM Sans", color="black",size=4,alpha=0.6) +
theme(plot.caption = element_text(hjust = 0, face= "italic"),
plot.title.position = "plot",
strip.background = element_rect(colour="white", fill="#FFFFFF"),
text = element_text(family = "CM Sans", size=12),
plot.title = element_text(hjust = 0.2, vjust=-1, size=10),
legend.position="none", panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
plot.margin=unit(c(15, 5.5, 5.5, 5.5), "points"))
p11
### Figure 3b: Density of estimates of differences in hitting and missing confidence intervals
load("data/simulations/nullmodel_random_19.Rda")
custom.col <- c( "#D16103", "#4E84C4")
R <- Q %>% select(coverage,delta1,delta2,d0_pll,d0_lo,d0_hi) %>% mutate(dd = delta1-delta2)
R <- R %>% mutate(center=signif(d0_pll-dd,digits=4), lo=signif(d0_lo-dd,digits=4), hi=signif(d0_hi-dd,digits=4))
R <- R %>% mutate(cover=ifelse((hi < 0 | lo > 0), F, T))
R <- R %>% filter(coverage %in% c(30,40,100,200))
p12 <- ggplot(R, aes(x=center,group=cover,fill=cover,colour=cover))+
geom_density(alpha=0.5,color=NA) + scale_fill_manual(values=custom.col) +
labs(fill="CI overlap") +
geom_vline(xintercept=0,linetype="dashed",color="black",size=0.8) +
annotate("text", label=paste0("n=600K"),
x=-0.25,y=8,family = "CM Sans", color="black",size=4,alpha=0.6) +
theme(legend.position = c(0.7, 0.85), legend.direction="vertical", panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.title.y=element_blank(),axis.text.y=element_blank(), axis.ticks.y=element_blank(), plot.margin=unit(c(15, 5.5, 5.5, 5.5), "points")) + coord_flip(xlim=c(-0.3,0.3),ylim=c(0,16))
p12
tmp <- ggarrange(p11, p12, nrow = 1, widths=c(6,2),vjust=1)
tmp
load("data/simulations/nullmodel_random_19.Rda")
custom.col <- c( "#C4961A", "#D16103", "#4E84C4", "#FFDB6D", "#52854C", "#F4EDCA",
"#C3D7A4", "#52854C", "#4E84C4", "#293352")
precision = 2
R <- Q %>% select(coverage,replicates,delta1,delta2,d0_pll,d0_lo,d0_hi) %>% mutate(dd = signif(delta1-delta2, digits=precision))
R <- R %>% mutate(d0_lo=signif(d0_lo,digits=precision), d0_hi = signif(d0_hi, digits=precision))
R <- R %>% mutate(cover=ifelse( dd < d0_lo | dd > d0_hi, F, T))
R <- R %>% filter(coverage %in% c(30,40,100,200))
R <- R %>% dplyr::group_by(coverage,replicates) %>% dplyr::summarize(n=n(),hits=sum(cover==TRUE), misses=sum(cover==FALSE)) %>% mutate(cp=1-(misses/n)) %>% ungroup()
## `summarise()` regrouping output by 'coverage' (override with `.groups` argument)
R <- R %>% mutate(coverage=as.factor(coverage)) %>% mutate(replicates=as.factor(replicates))
p13<-ggplot(R, aes(x=coverage, y = cp, group=replicates, color=replicates)) +
geom_line() + geom_point() +
labs(y="CI coverage probability", x="mean read coverage")+
scale_colour_manual(values=custom.col) +
geom_hline(yintercept=0.95, linetype="dashed", alpha=0.6) +
scale_y_continuous(position = "right", limits=c(0.92,0.952)) +
annotate("text", label=paste0("n=50K/pt."),
x=3.5,y=0.923,family = "CM Sans", color="black",size=4,alpha=0.6) +
theme(plot.caption = element_text(hjust = 0, face= "italic"),
plot.title.position = "plot",
strip.background = element_rect(colour="white", fill="#FFFFFF"),
text = element_text(family = "CM Sans", size=12),
plot.title = element_text(hjust = 0.2, vjust=-1, size=10),
legend.position = c(0.20, 0.7), panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
axis.title.y.right = element_text(vjust=2.5), plot.margin=unit(c(15, 5.5, 5.5, 5.5), "points"))
p13
fig3.top <- ggarrange(p11, p12, p13, nrow = 1, widths=c(6,2,4),labels=c("a.","b.","c."),label.x=c(0.08,-0.07,-0.04),vjust=3,hjust=-1.8)
fig3.top
load("data/simulations/nullmodel_grid_19.Rda")
load("data/simulations/simulation_foreground_4.Rda")
M <- R %>% group_by(coverage,replicates,combination) %>% mutate(padj=p.adjust(p,method="fdr"))
M <- M %>% arrange(padj) %>% mutate(total=1:n(), truepositive =cumsum(diff), falsepositive=total-truepositive, fdr=falsepositive/total, precision=truepositive/total, fpr=falsepositive/5000, recall=truepositive/500) %>% ungroup()
replicatenames <- paste(c(5,10,20),"replicates")
MM <- M %>% filter(combination %in% c(3,4,8,9,13,14)) %>%
mutate(background=case_when((combination %in% c(3,4)) ~ "0.5/0.5",
(combination %in% c(8,9)) ~ "0.9/0.9",
(combination %in% c(13,14)) ~ "0.1/0.1"))
MM <- MM %>% mutate(difference=case_when((combination %in% c(3,8,13)) ~ "0.2",
(combination %in% c(4,9,14)) ~ "0.25"))
MM <- MM %>% mutate(replicates=factor(replicates,levels=c(5,10,20), labels=replicatenames))
MM <- MM %>% mutate(coverage=factor(coverage))
MM <- MM %>% select(diff,difference,background,coverage,replicates,padj,precision,recall,fdr,fpr,total)
MM$nester <- ifelse(MM$background == "0.9/0.9", "background (5modC/5mC): 0.9/0.9",
ifelse(MM$background == "0.1/0.1", "background (5modC,5mC): 0.1/0.1", "background (5modC/5mC): 0.5/0.5"))
highlight_df <- MM %>% group_by(coverage, replicates, background, difference, nester) %>% filter(padj >= 0.1) %>% slice_head()
highlight_df <- highlight_df %>% mutate(recall=ifelse(padj>0.12,NA,recall))
custom.col <- c( "#C4961A", "#D16103", "#4E84C4", "#FFDB6D", "#52854C", "#F4EDCA",
"#C3D7A4", "#52854C", "#4E84C4", "#293352")
p14 <- ggplot(MM, aes(x = recall, y = fdr, color=coverage)) + geom_line(size=1) +
geom_hline(yintercept=0.10, linetype="dashed") +
geom_point(data=highlight_df, aes(x=recall,y=fdr, color=coverage), alpha=0.5, size=3) +
geom_text(data=highlight_df,aes(x=recall,y=fdr,label=signif(padj,digits=2)),hjust=0.5,vjust=-0.5,color="black") +
scale_colour_manual(values=custom.col) +
ggh4x::facet_nested(replicates ~ nester+difference, nest_line = TRUE) +
labs(title= "difference in hydroxymethylation", y = "false discovery rate", x="recall",color="mean cov.") +
scale_x_continuous(breaks=c(0.25,0.5, 0.75)) +
theme(plot.caption = element_text(hjust = 0, face= "italic"),
plot.title.position = "plot",
strip.background = element_rect(colour="white", fill="#FFFFFF"),
legend.background = element_rect(color = NA,fill=alpha('white', 0.0)),
text = element_text(family = "CM Sans", size=12),
plot.title = element_text(hjust = 0.5, size=12),
axis.text.x = element_text(size=8),
legend.position = c(0.91, 0.18),
panel.grid.minor = element_blank(),plot.margin=unit(c(15, 5.5, 5.5, 5.5), "points")
)
p14
fig3 <- ggarrange(fig3.top, p14, nrow = 2, widths=c(9), heights=c(3.5,5.5),labels=c("","d."),label.x=(0.07))
fig3
ggsave(filename = "figures/fig3.png", plot = fig3, device = "png")
## Saving 9 x 9 in image
ggsave(filename = "figures/fig3.svg", plot = fig3, device = "svg")
## Saving 9 x 9 in image
ggsave(filename = "figures/fig3.noembed.pdf", plot= fig3, device="pdf")
## Saving 9 x 9 in image
embed_fonts("figures/fig3.noembed.pdf", outfile="figures/fig3.pdf")
stab3 <- MM %>% group_by(background,difference,coverage,replicates) %>% filter(fdr >= 0.1) %>% slice_head(n=1) %>% select(difference,background,coverage,replicates,recall,fdr) %>% ungroup()
stab3$replicates <- stab3$replicates %>% fct_relabel(~gsub(" replicates", "", .x))
print(xtable(stab3, type = "latex"), file = "tables/stab3_recall_fdr_table.tex")
stab3
## # A tibble: 54 x 6
## difference background coverage replicates recall fdr
## <chr> <chr> <fct> <fct> <dbl> <dbl>
## 1 0.2 0.1/0.1 30 5 0.826 0.100
## 2 0.2 0.1/0.1 30 10 0.992 0.101
## 3 0.2 0.1/0.1 30 20 1 0.101
## 4 0.2 0.1/0.1 40 5 0.946 0.101
## 5 0.2 0.1/0.1 40 10 0.998 0.101
## 6 0.2 0.1/0.1 40 20 1 0.101
## 7 0.2 0.1/0.1 100 5 1 0.101
## 8 0.2 0.1/0.1 100 10 1 0.101
## 9 0.2 0.1/0.1 100 20 1 0.101
## 10 0.25 0.1/0.1 30 5 0.958 0.101
## # ... with 44 more rows
custom.col <- c( "#C4961A", "#D16103", "#4E84C4", "#FFDB6D", "#52854C", "#F4EDCA",
"#C3D7A4", "#52854C", "#4E84C4", "#293352")
set.seed(789)
tmp <- MM %>% select(difference,background,coverage,replicates,padj,fdr) %>% group_by(difference,background,coverage,replicates) %>% slice_sample(n=500)
p15 <- ggplot(tmp, aes(x=padj,y=fdr,color=coverage)) + geom_point(size=0.3) + scale_colour_manual(values=custom.col) +
ggh4x::facet_nested(replicates ~ background+difference, nest_line = TRUE) +
labs(title= "difference in hydroxymethylation", y = "empirical false discovery rate", x="hydi fdr-adjusted pvalue",color="mean cov.") +
scale_x_continuous(breaks=c(0.25,0.5, 0.75)) +
theme(plot.caption = element_text(hjust = 0, face= "italic"),
plot.title.position = "plot",
strip.background = element_rect(colour="white", fill="#FFFFFF"),
legend.background = element_rect(color = NA,fill=alpha('white', 0.0)),
text = element_text(family = "CM Sans", size=12),
plot.title = element_text(hjust = 0.5, size=12),
axis.text.x = element_text(size=8),
legend.position = c(0.91, 0.18),
panel.grid.minor = element_blank(),plot.margin=unit(c(15, 5.5, 5.5, 5.5), "points")
)
p15
ggsave(filename = "figures/sfig5.png", plot = p15, device = "png")
## Saving 9 x 9 in image
ggsave(filename = "figures/sfig5.svg", plot = p15, device = "svg")
## Saving 9 x 9 in image
ggsave(filename = "figures/sfig5.noembed.pdf", plot= p15, device="pdf")
## Saving 9 x 9 in image
embed_fonts("figures/sfig5.noembed.pdf", outfile="figures/sfig5.pdf")
load("data/simulations/nullmodel_grid_19.Rda")
load("data/simulations/simulation_foreground_4.Rda")
M <- R %>% group_by(coverage,replicates,combination) %>% mutate(padj=p.adjust(p,method="fdr"))
M <- M %>% arrange(padj) %>% mutate(total=1:n(), truepositive =cumsum(diff), falsepositive=total-truepositive, fdr=falsepositive/total, precision=truepositive/total, fpr=falsepositive/5000, recall=truepositive/500) %>% ungroup()
replicatenames <- paste(c(5,10,20),"replicates")
MM <- M %>% filter(combination %in% c(1,2,5,6,7,10,11,12,15)) %>%
mutate(background=case_when((combination %in% c(1,2,5)) ~ "0.5/0.5",
(combination %in% c(6,7,10)) ~ "0.1/0.1",
(combination %in% c(11,12,15)) ~ "0.9/0.9"))
MM <- MM %>% mutate(difference=case_when((combination %in% c(1,6,11)) ~ "0.1",
(combination %in% c(2,7,12)) ~ "0.15",
(combination %in% c(5,10,15)) ~ "0.3"))
MM <- MM %>% mutate(replicates=factor(replicates,levels=c(5,10,20), labels=replicatenames))
MM <- MM %>% mutate(coverage=factor(coverage))
MM <- MM %>% select(diff,difference,background,coverage,replicates,padj,precision,recall,fdr,fpr,total)
MM$nester <- MM$background
highlight_df <- MM %>% group_by(coverage, replicates, background, difference, nester) %>% filter(padj >= 0.1) %>% slice_head()
highlight_df <- highlight_df %>% mutate(recall=ifelse(padj>0.11,NA,recall))
custom.col <- c( "#C4961A", "#D16103", "#4E84C4", "#FFDB6D", "#52854C", "#F4EDCA",
"#C3D7A4", "#52854C", "#4E84C4", "#293352")
p16 <- ggplot(MM, aes(x = recall, y = fdr, color=coverage)) + geom_line(size=1) +
geom_hline(yintercept=0.10, linetype="dashed") +
geom_point(data=highlight_df, aes(x=recall,y=fdr, color=coverage), alpha=0.5, size=3) +
geom_text(data=highlight_df,aes(x=recall,y=fdr,label="0.1"),hjust=0.5,vjust=-0.5,color="black") +
scale_colour_manual(values=custom.col) +
ggh4x::facet_nested(replicates ~ nester+difference, nest_line = TRUE) +
labs(title= "difference in hydroxymethylation", y = "false discovery rate", x="recall",color="cov.") +
scale_x_continuous(breaks=c(0.25,0.5, 0.75)) +
theme(plot.caption = element_text(hjust = 0, face= "italic"),
plot.title.position = "plot",
strip.background = element_rect(colour="white", fill="#FFFFFF"),
legend.background = element_rect(color = NA,fill=alpha('white', 0.0)),
text = element_text(family = "CM Sans", size=12),
plot.title = element_text(hjust = 0.5, size=12),
axis.text.x = element_text(size=8, angle=45),
legend.position = c(0.94, 0.18),
panel.grid.minor = element_blank(),plot.margin=unit(c(15, 5.5, 5.5, 5.5), "points")
)
p16
## Warning: Removed 3 rows containing missing values (geom_point).
## Warning: Removed 3 rows containing missing values (geom_text).
ggsave(filename = "figures/sfig6.png", plot = p16, device = "png")
## Saving 7 x 5 in image
## Warning: Removed 3 rows containing missing values (geom_point).
## Warning: Removed 3 rows containing missing values (geom_text).
ggsave(filename = "figures/sfig6.svg", plot = p16, device = "svg")
## Saving 7 x 5 in image
## Warning: Removed 3 rows containing missing values (geom_point).
## Warning: Removed 3 rows containing missing values (geom_text).
ggsave(filename = "figures/sfig6.noembed.pdf", plot = p16, device= "pdf")
## Saving 7 x 5 in image
## Warning: Removed 3 rows containing missing values (geom_point).
## Warning: Removed 3 rows containing missing values (geom_text).
embed_fonts("figures/sfig6.noembed.pdf", outfile="figures/sfig6.pdf")
stab4 <- MM %>% group_by(background,difference,coverage,replicates) %>% filter(fdr >= 0.1) %>% slice_head(n=1) %>% select(difference,background,coverage,replicates,recall,fdr) %>% ungroup()
stab4$replicates <- stab4$replicates %>% fct_relabel(~gsub(" replicates", "", .x))
print(xtable(stab4, type = "latex"), file = "tables/stab4_recall_fdr_table.tex")
stab4
## # A tibble: 81 x 6
## difference background coverage replicates recall fdr
## <chr> <chr> <fct> <fct> <dbl> <dbl>
## 1 0.1 0.1/0.1 30 5 0.01 0.167
## 2 0.1 0.1/0.1 30 10 0.396 0.1
## 3 0.1 0.1/0.1 30 20 0.852 0.101
## 4 0.1 0.1/0.1 40 5 0.15 0.107
## 5 0.1 0.1/0.1 40 10 0.622 0.101
## 6 0.1 0.1/0.1 40 20 0.968 0.100
## 7 0.1 0.1/0.1 100 5 0.828 0.1
## 8 0.1 0.1/0.1 100 10 0.996 0.101
## 9 0.1 0.1/0.1 100 20 1 0.101
## 10 0.15 0.1/0.1 30 5 0.364 0.103
## # ... with 71 more rows
load("data/simulations/nullmodel_grid_19.Rda")
load("data/simulations/simulation_foreground_4.Rda")
M <- R %>% group_by(coverage,replicates,combination) %>% mutate(padj=p.adjust(p,method="fdr"))
M <- M %>% arrange(padj) %>% mutate(total=1:n(), truepositive =cumsum(diff), falsepositive=total-truepositive, fdr=falsepositive/total, precision=truepositive/total, fpr=falsepositive/5000, recall=truepositive/500) %>% ungroup()
replicatenames <- paste(c(5,10,20),"replicates")
MM <- M %>% filter(combination %in% c(3,4,8,9,13,14)) %>%
mutate(background=case_when((combination %in% c(3,4)) ~ "0.5/0.5",
(combination %in% c(8,9)) ~ "0.9/0.9",
(combination %in% c(13,14)) ~ "0.1/0.1"))
MM <- MM %>% mutate(difference=case_when((combination %in% c(3,8,13)) ~ "0.2",
(combination %in% c(4,9,14)) ~ "0.25"))
MM <- MM %>% mutate(replicates=factor(replicates,levels=c(5,10,20), labels=replicatenames))
MM <- MM %>% mutate(coverage=factor(coverage))
MM <- MM %>% select(diff,difference,background,coverage,replicates,padj,precision,recall,fdr,fpr,total)
MM$nester <- ifelse(MM$background == "0.9/0.9", "background (5modC/5mC): 0.9/0.9",
ifelse(MM$background == "0.1/0.1", "background (5modC,5mC): 0.1/0.1", "background (5modC/5mC): 0.5/0.5"))
highlight_df <- MM %>% group_by(coverage, replicates, background, difference, nester) %>% filter(padj >= 0.1) %>% slice_head()
highlight_df <- highlight_df %>% mutate(recall=ifelse(padj>0.12,NA,recall))
p17 <- ggplot(MM, aes(d = diff, m = padj, color=coverage)) +
geom_roc(increasing=FALSE, n.cuts=0) +
geom_point(data=highlight_df,aes(x=fpr,y=recall, color=coverage), alpha=0.5, size=3) +
geom_text(data=highlight_df,aes(x=fpr,y=recall,label="0.1"),hjust=-0.5,vjust=0,color="black") +
style_roc() +
scale_colour_manual(values=custom.col) +
ggh4x::facet_nested(replicates ~ nester+difference, nest_line = TRUE) +
labs(title= "difference in hydroxmethylation") +
theme(plot.caption = element_text(hjust = 0, face= "italic"),
plot.title.position = "plot",
strip.background = element_rect(colour="white", fill="#FFFFFF"),
text = element_text(family = "CM Sans", size=12),
plot.title = element_text(hjust = 0.45, size=12),
legend.position = c(0.75, 0.18),
legend.background = element_rect(linetype = 2, size = 0.3, color=1, fill=alpha('white', 0.5)),
)
p17
ggsave(filename = "figures/sfig7.png", plot = p17, device = "png")
## Saving 7 x 5 in image
ggsave(filename = "figures/sfig7.svg", plot = p17, device = "svg")
## Saving 7 x 5 in image
ggsave(filename = "figures/sfig7.noembed.pdf", plot = p17, device= "pdf")
## Saving 7 x 5 in image
embed_fonts("figures/sfig7.noembed.pdf", outfile="figures/sfig7.pdf")
custom.col <- c( "#C4961A", "#D16103", "#4E84C4", "#FFDB6D", "#52854C", "#F4EDCA",
"#C3D7A4", "#52854C", "#4E84C4", "#293352")
rss <- read_delim("data/simulations/pidstat.maxrsssummary.csv", delim="\t")
## Parsed with column specification:
## cols(
## replicates = col_double(),
## coverage = col_double(),
## N = col_double(),
## VSZ = col_double(),
## RSS = col_double(),
## t = col_double()
## )
rss <- rss %>%mutate(replicates=as.factor(paste0(replicates," replicates")), coverage=as.factor(paste0("mean cov. ",coverage)), RSS=(RSS*1000)/1000000, N=N/1000 )
rss <- rss %>% mutate(replicates = factor(replicates,levels=c("5 replicates", "10 replicates","20 replicates")))
rss <- rss %>% mutate(coverage = factor(coverage,levels=c("mean cov. 30", "mean cov. 40","mean cov. 100", "mean cov. 200")))
p18 <- ggplot(rss, aes(x=N, y=RSS, color=coverage)) + geom_line(alpha=0.5) + geom_point(alpha=0.5) + scale_colour_manual(values=custom.col) + labs(y="RSS (MB)") + scale_x_continuous(name ="N (x1000)",
breaks=c(10,100,500)) + theme(text = element_text(family = "CM Sans", size=11), legend.position="none") + facet_grid(replicates~coverage,scales='free')
p18
spd <- read_delim("data/simulations/hyperfine.summary.csv", delim="\t")
## Parsed with column specification:
## cols(
## replicates = col_double(),
## coverage = col_double(),
## N = col_double(),
## mean = col_double(),
## stddev = col_double(),
## median = col_double(),
## user = col_double(),
## system = col_double(),
## min = col_double(),
## max = col_double()
## )
spd <- spd %>%mutate(replicates=as.factor(paste0(replicates," replicates")), coverage=as.factor(paste0("mean cov. ",coverage)), N=N/1000 )
spd <- spd %>% mutate(replicates = factor(replicates,levels=c("5 replicates", "10 replicates","20 replicates")))
spd <- spd %>% mutate(coverage = factor(coverage,levels=c("mean cov. 30", "mean cov. 40","mean cov. 100", "mean cov. 200")))
p19 <- ggplot(spd, aes(x=N, y=mean, color=coverage)) + geom_line(alpha=0.5) + geom_point(alpha=0.5) + scale_colour_manual(values=custom.col) + scale_y_continuous(name="mean run time (seconds)",breaks=c(0,50,100,150,200,250,300,350)) + scale_x_continuous(name ="N (x1000)",
breaks=c(10,100,500)) + theme(text = element_text(family = "CM Sans", size=11), legend.position="none") + facet_grid(replicates~coverage)
p19
p20 <- ggplot(spd, aes(x=replicates, y=mean/(N), color=replicates)) + geom_boxplot() + geom_point(alpha=0.5) + labs(x="", y="runtime per CpG (ms)") + theme(text = element_text(family = "CM Sans", size=11), legend.position="none") + scale_colour_manual(values=custom.col)
p20
p21 <- ggplot(spd, aes(x=coverage, y=mean/(N), color=coverage)) + geom_boxplot() + geom_point(alpha=0.5) + labs(x="", y="runtime per CpG (ms)") + theme(text = element_text(family = "CM Sans", size=11), legend.position="none") + scale_colour_manual(values=custom.col)
p21
#### Export Supplemental Figure 7
sfig8.top <- ggarrange(p18, p19, nrow = 2, widths=c(10), heights=c(10,10),labels=c("a.","b."))
sfig8.top
sfig8.bot <- ggarrange(p20, p21, nrow = 1, widths=c(5),heights=c(2),labels=c("c.","d."))
sfig8.bot
sfig8 <- ggarrange(sfig8.top,sfig8.bot,nrow=2,heights=c(10,5))
sfig8
ggsave(filename = "figures/sfig8.png", plot = sfig8, device = "png")
## Saving 9 x 9 in image
ggsave(filename = "figures/sfig8.svg", plot = sfig8, device = "svg")
## Saving 9 x 9 in image
ggsave(filename = "figures/sfig8.noembed.pdf", plot = sfig8, device= "pdf")
## Saving 9 x 9 in image
embed_fonts("figures/sfig8.noembed.pdf", outfile="figures/sfig8.pdf")